Cargando paquetes
raw_data <- here("data", "raw")
interim_data <- here("data", "interim")
processed_data <- here("data", "processed")
base <- readRDS(file=paste0(processed_data,"/base_final.Rds"))
base <- base %>%
mutate(objetivo = factor(if_else(objetivo==0, "bueno", "malo")))
head(base)
task_tarj = TaskClassif$new(id = "tarjetas", backend = base, target = "objetivo")
print(task_tarj)
<TaskClassif:tarjetas> (14500 x 9)
* Target: objetivo
* Properties: twoclass
* Features (8):
- dbl (4): coeficiente_solvencia, edad, limite_tarjeta_credito, saldo_tarjeta
- fct (4): estado_civil, genero, profesion, tipo_ingresos
#autoplot(task_tarj$select(c("saldo_tarjeta", "coeficiente_solvencia", "edad", "limite_tarjeta_credito")), type = "pairs")
#task_tarj = TaskClassif$new(id = "tarjetas", backend = base, target = "objetivo")
lrn_rpart = lrn("classif.rpart", predict_type = "prob")
lrn_glmnet = lrn("classif.glmnet", predict_type = "prob")
lrn_knn = lrn("classif.kknn", predict_type = "prob")
lrn_lda = lrn("classif.lda", predict_type = "prob")
lrn_nnet = lrn("classif.nnet", predict_type = "prob")
lrn_rf = lrn("classif.ranger", predict_type = "prob")
lrn_svm = lrn("classif.svm", predict_type = "prob")
lrn_xgboost = lrn("classif.xgboost", predict_type = "prob")
set.seed(34678)
train_set = sample(task_tarj$nrow, 0.8 * task_tarj$nrow)
test_set = setdiff(seq_len(task_tarj$nrow), train_set)
#th = prop.table(table(base$objetivo))[1]
th = .5
impute_fcts <- po("imputemode", affect_columns = selector_type("factor"))
impute_nums <- po("imputehist", affect_columns = selector_type("numeric"))
encode <- po("encode", affect_columns = selector_type("factor"))
threshold <- po("threshold", param_vals = list(thresholds = th))
pre_procesamiento <- impute_fcts %>>%
impute_nums %>>%
encode
lrn_rpart$predict_type = "prob"
lrn_glmnet$predict_type = "prob"
lrn_knn$predict_type = "prob"
lrn_lda$predict_type = "prob"
lrn_nnet$predict_type = "prob"
lrn_rf$predict_type = "prob"
lrn_svm$predict_type = "prob"
lrn_xgboost$predict_type = "prob"
lrn_rpart <- GraphLearner$new(pre_procesamiento %>>% po(lrn_rpart) %>>% threshold)
lrn_glmnet <- GraphLearner$new(pre_procesamiento %>>% po(lrn_glmnet) %>>% threshold)
lrn_knn <- GraphLearner$new(pre_procesamiento %>>% po(lrn_knn) %>>% threshold)
lrn_lda <- GraphLearner$new(pre_procesamiento %>>% po(lrn_lda) %>>% threshold)
lrn_nnet <- GraphLearner$new(pre_procesamiento %>>% po(lrn_nnet) %>>% threshold)
lrn_rf <- GraphLearner$new(pre_procesamiento %>>% po(lrn_rf) %>>% threshold)
lrn_svm <- GraphLearner$new(pre_procesamiento %>>% po(lrn_svm) %>>% threshold)
lrn_xgboost <- GraphLearner$new(pre_procesamiento %>>% po(lrn_xgboost) %>>% threshold)
lrn_rpart$predict_sets = c("train", "test")
lrn_glmnet$predict_sets = c("train", "test")
lrn_knn$predict_sets = c("train", "test")
lrn_lda$predict_sets = c("train", "test")
#lrn_log_reg$predict_sets = c("train", "test")
lrn_nnet$predict_sets = c("train", "test")
lrn_rf$predict_sets = c("train", "test")
lrn_svm$predict_sets = c("train", "test")
lrn_xgboost$predict_sets = c("train", "test")
lrn_rpart$id = "Árbol"
lrn_glmnet$id = "Reg-reg"
lrn_knn$id = "K-vecinos"
lrn_lda$id = "LDA"
#lrn_log_reg$id = "Reg-log"
lrn_nnet$id = "Red-Neur"
lrn_rf$id = "RandomForest"
lrn_svm$id = "SVM"
lrn_xgboost$id = "XGBoost"
learners = list(
lrn_rpart,
lrn_glmnet,
lrn_knn,
lrn_lda,
#lrn_log_reg,
lrn_nnet,
lrn_rf,
lrn_svm,
lrn_xgboost
)
resamplings = rsmp("cv", folds = 10)
design = benchmark_grid(task_tarj, learners, resamplings)
future::plan("multiprocess")
bmr = benchmark(design)
INFO [23:50:10.784] Benchmark with 80 resampling iterations
INFO [23:50:14.063] Applying learner 'XGBoost' on task 'tarjetas' (iter 5/10)
INFO [23:50:16.315] Applying learner 'SVM' on task 'tarjetas' (iter 1/10)
INFO [23:56:22.533] Applying learner 'Reg-reg' on task 'tarjetas' (iter 8/10)
INFO [23:56:27.216] Applying learner 'LDA' on task 'tarjetas' (iter 4/10)
INFO [23:56:31.113] Applying learner 'XGBoost' on task 'tarjetas' (iter 2/10)
INFO [23:56:36.015] Applying learner 'LDA' on task 'tarjetas' (iter 9/10)
INFO [23:56:39.786] Applying learner 'SVM' on task 'tarjetas' (iter 7/10)
INFO [00:02:44.509] Applying learner 'Reg-reg' on task 'tarjetas' (iter 5/10)
INFO [00:02:47.114] Applying learner 'K-vecinos' on task 'tarjetas' (iter 9/10)
INFO [00:03:04.866] Applying learner 'Árbol' on task 'tarjetas' (iter 6/10)
INFO [00:03:07.940] Applying learner 'LDA' on task 'tarjetas' (iter 10/10)
INFO [00:03:10.788] Applying learner 'LDA' on task 'tarjetas' (iter 1/10)
INFO [00:03:13.028] Applying learner 'SVM' on task 'tarjetas' (iter 5/10)
INFO [00:07:25.607] Applying learner 'Red-Neur' on task 'tarjetas' (iter 6/10)
# weights: 88
initial value 8864.344041
iter 10 value 3550.653729
iter 20 value 3525.984748
iter 30 value 3524.750652
iter 40 value 3517.336095
iter 50 value 3504.527029
iter 60 value 3490.270601
iter 70 value 3488.355126
final value 3488.124393
converged
INFO [00:07:29.608] Applying learner 'Red-Neur' on task 'tarjetas' (iter 7/10)
# weights: 88
initial value 9808.686149
final value 3655.011275
converged
INFO [00:07:32.254] Applying learner 'LDA' on task 'tarjetas' (iter 2/10)
INFO [00:07:34.883] Applying learner 'Red-Neur' on task 'tarjetas' (iter 9/10)
# weights: 88
initial value 18674.516815
iter 10 value 3562.604417
final value 3562.604078
converged
INFO [00:07:37.665] Applying learner 'RandomForest' on task 'tarjetas' (iter 6/10)
INFO [00:07:58.589] Applying learner 'Reg-reg' on task 'tarjetas' (iter 4/10)
INFO [00:08:01.589] Applying learner 'LDA' on task 'tarjetas' (iter 3/10)
INFO [23:50:17.833] Applying learner 'LDA' on task 'tarjetas' (iter 8/10)
INFO [23:50:20.946] Applying learner 'Árbol' on task 'tarjetas' (iter 10/10)
INFO [23:50:24.874] Applying learner 'Reg-reg' on task 'tarjetas' (iter 9/10)
INFO [23:50:29.407] Applying learner 'Árbol' on task 'tarjetas' (iter 8/10)
INFO [23:50:32.711] Applying learner 'K-vecinos' on task 'tarjetas' (iter 10/10)
INFO [23:51:07.466] Applying learner 'K-vecinos' on task 'tarjetas' (iter 3/10)
INFO [23:51:48.976] Applying learner 'RandomForest' on task 'tarjetas' (iter 7/10)
INFO [23:52:24.909] Applying learner 'XGBoost' on task 'tarjetas' (iter 6/10)
INFO [23:52:35.543] Applying learner 'RandomForest' on task 'tarjetas' (iter 10/10)
INFO [23:53:07.382] Applying learner 'Red-Neur' on task 'tarjetas' (iter 8/10)
# weights: 88
initial value 5082.615183
final value 3662.311866
converged
INFO [23:53:11.597] Applying learner 'Red-Neur' on task 'tarjetas' (iter 3/10)
# weights: 88
initial value 7879.719197
final value 3659.879369
converged
INFO [23:53:19.135] Applying learner 'Red-Neur' on task 'tarjetas' (iter 10/10)
# weights: 88
initial value 9476.761085
final value 3663.376169
converged
INFO [23:53:30.673] Applying learner 'LDA' on task 'tarjetas' (iter 7/10)
INFO [23:53:36.488] Applying learner 'Reg-reg' on task 'tarjetas' (iter 2/10)
INFO [23:53:41.171] Applying learner 'K-vecinos' on task 'tarjetas' (iter 4/10)
INFO [23:54:21.684] Applying learner 'XGBoost' on task 'tarjetas' (iter 3/10)
INFO [23:54:26.860] Applying learner 'Reg-reg' on task 'tarjetas' (iter 1/10)
INFO [23:54:30.616] Applying learner 'Reg-reg' on task 'tarjetas' (iter 3/10)
INFO [23:54:34.528] Applying learner 'SVM' on task 'tarjetas' (iter 10/10)
INFO [00:00:52.048] Applying learner 'LDA' on task 'tarjetas' (iter 6/10)
INFO [23:50:23.251] Applying learner 'K-vecinos' on task 'tarjetas' (iter 7/10)
INFO [23:50:52.926] Applying learner 'K-vecinos' on task 'tarjetas' (iter 8/10)
INFO [23:51:38.823] Applying learner 'Árbol' on task 'tarjetas' (iter 4/10)
INFO [23:51:45.445] Applying learner 'Red-Neur' on task 'tarjetas' (iter 4/10)
# weights: 88
initial value 8002.400008
final value 3606.101478
converged
INFO [23:51:49.606] Applying learner 'RandomForest' on task 'tarjetas' (iter 2/10)
INFO [23:52:28.852] Applying learner 'RandomForest' on task 'tarjetas' (iter 9/10)
INFO [23:53:02.872] Applying learner 'SVM' on task 'tarjetas' (iter 8/10)
INFO [23:59:07.758] Applying learner 'RandomForest' on task 'tarjetas' (iter 1/10)
INFO [23:59:41.717] Applying learner 'SVM' on task 'tarjetas' (iter 3/10)
INFO [00:04:27.012] Applying learner 'Reg-reg' on task 'tarjetas' (iter 10/10)
INFO [00:04:30.017] Applying learner 'RandomForest' on task 'tarjetas' (iter 4/10)
INFO [00:04:49.971] Applying learner 'Árbol' on task 'tarjetas' (iter 2/10)
INFO [00:04:52.604] Applying learner 'Reg-reg' on task 'tarjetas' (iter 7/10)
INFO [00:04:55.364] Applying learner 'XGBoost' on task 'tarjetas' (iter 8/10)
INFO [00:04:57.694] Applying learner 'XGBoost' on task 'tarjetas' (iter 9/10)
INFO [00:04:59.565] Applying learner 'SVM' on task 'tarjetas' (iter 4/10)
INFO [00:08:56.617] Applying learner 'SVM' on task 'tarjetas' (iter 9/10)
INFO [00:11:04.333] Applying learner 'Red-Neur' on task 'tarjetas' (iter 2/10)
# weights: 88
initial value 8668.465816
final value 3647.701370
converged
INFO [00:11:05.166] Applying learner 'Árbol' on task 'tarjetas' (iter 5/10)
INFO [00:11:06.497] Applying learner 'SVM' on task 'tarjetas' (iter 2/10)
INFO [23:50:29.282] Applying learner 'K-vecinos' on task 'tarjetas' (iter 5/10)
INFO [23:51:05.179] Applying learner 'Árbol' on task 'tarjetas' (iter 3/10)
INFO [23:51:10.309] Applying learner 'XGBoost' on task 'tarjetas' (iter 7/10)
INFO [23:51:14.753] Applying learner 'K-vecinos' on task 'tarjetas' (iter 6/10)
INFO [23:52:07.934] Applying learner 'RandomForest' on task 'tarjetas' (iter 8/10)
INFO [23:52:46.460] Applying learner 'Árbol' on task 'tarjetas' (iter 9/10)
INFO [23:53:00.419] Applying learner 'Árbol' on task 'tarjetas' (iter 1/10)
INFO [23:53:06.782] Applying learner 'XGBoost' on task 'tarjetas' (iter 1/10)
INFO [23:53:11.900] Applying learner 'RandomForest' on task 'tarjetas' (iter 3/10)
INFO [23:53:39.734] Applying learner 'Red-Neur' on task 'tarjetas' (iter 5/10)
# weights: 88
initial value 12571.518812
final value 3650.139041
converged
INFO [23:53:43.521] Applying learner 'RandomForest' on task 'tarjetas' (iter 5/10)
INFO [23:54:09.447] Applying learner 'XGBoost' on task 'tarjetas' (iter 4/10)
INFO [23:54:12.300] Applying learner 'LDA' on task 'tarjetas' (iter 5/10)
INFO [23:54:17.259] Applying learner 'XGBoost' on task 'tarjetas' (iter 10/10)
INFO [23:54:21.053] Applying learner 'SVM' on task 'tarjetas' (iter 6/10)
INFO [00:00:35.066] Applying learner 'Reg-reg' on task 'tarjetas' (iter 6/10)
INFO [00:00:40.833] Applying learner 'Árbol' on task 'tarjetas' (iter 7/10)
INFO [00:00:46.219] Applying learner 'K-vecinos' on task 'tarjetas' (iter 1/10)
INFO [00:01:22.990] Applying learner 'Red-Neur' on task 'tarjetas' (iter 1/10)
# weights: 88
initial value 8686.105637
iter 10 value 3586.420021
iter 10 value 3586.420021
iter 10 value 3586.420021
final value 3586.420021
converged
INFO [00:01:26.143] Applying learner 'K-vecinos' on task 'tarjetas' (iter 2/10)
INFO [00:12:34.718] Finished benchmark
costs = matrix(c(0, 2, 3, -1), nrow = 2)
dimnames(costs) = list(response = c("bueno", "malo"), truth = c("bueno", "malo"))
cost_measure = msr("classif.costs", costs = costs)
measures = list(
msr("classif.acc", id = "acc_train", predict_sets = "train"),
msr("classif.acc", id = "acc_test"),
msr("classif.ppv", id = "ppv_train", predict_sets = "train"),
msr("classif.ppv", id = "ppv_test"),
msr("classif.tpr", id = "tpr_train", predict_sets = "train"),
msr("classif.tpr", id = "tpr_test"),
msr("classif.costs", costs = costs, id = "cost_train", predict_sets = "train"),
msr("classif.costs", costs = costs, id = "cost_test")
)
bmr$aggregate(measures)